In [1]:
import pandas as pd
from transformation import transformer
import numpy as np
import plotly.express as px
In [2]:
df = pd.read_csv('OD_matrix_201803.csv',encoding='unicode_escape')
In [3]:
df.plot.scatter('X_start','Y_start')
Out[3]:
<AxesSubplot:xlabel='X_start', ylabel='Y_start'>

It is similar to the map of Estonia, but the projection is a bit weird. At least it is not 4326 nor 3857. It is in the local projection of 3300.

In [4]:
df
Out[4]:
KANT_start KANT_end start_kant_id end_kant_id X_start Y_start X_end Y_end route_id Population RegularMovers
0 Aakre Valga Aakre Valga 1 1 627213.008799 6.440209e+06 627213.008799 6.440209e+06 1697 470 96
1 Aakre Valga Annikoru Tartu 1 25 627213.008799 6.440209e+06 634298.193717 6.461856e+06 21178 470 1
2 Aakre Valga Elva linn 1 49 627213.008799 6.440209e+06 642136.014361 6.456563e+06 41506 470 12
3 Aakre Valga Elva ümbruse kant Tartu 1 50 627213.008799 6.440209e+06 639265.501560 6.453032e+06 42353 470 1
4 Aakre Valga Hellenurme Valga 1 81 627213.008799 6.440209e+06 640178.641901 6.447022e+06 68610 470 2
... ... ... ... ... ... ... ... ... ... ... ...
28504 Ülenurme Tartu Võnnu Tartu 839 810 659202.938891 6.468463e+06 681155.798401 6.464204e+06 686062 4959 6
28505 Ülenurme Tartu Võru linn 839 812 659202.938891 6.468463e+06 678417.528347 6.414736e+06 687756 4959 6
28506 Ülenurme Tartu Võõpste Tartu 839 816 659202.938891 6.468463e+06 683260.892473 6.473663e+06 691144 4959 1
28507 Ülenurme Tartu Välgi Tartu 839 823 659202.938891 6.468463e+06 669844.459691 6.497459e+06 697073 4959 1
28508 Ülenurme Tartu Ülenurme Tartu 839 839 659202.938891 6.468463e+06 659202.938891 6.468463e+06 710625 4959 2073

28509 rows × 11 columns

In [5]:
len(df.KANT_start.unique())
Out[5]:
845
In [6]:
len(df.KANT_end.unique())
Out[6]:
844
In [7]:
set(df.KANT_start.unique()) - set(df.KANT_end.unique())
Out[7]:
{'Vilsandi Saare'}
In [8]:
set(df.KANT_end.unique()) - set(df.KANT_start.unique())
Out[8]:
set()

Then, the KANT_start.unique() has all the locaions we need to transform their projection.

In [9]:
start = df.groupby('KANT_start').first()
In [10]:
start
Out[10]:
KANT_end start_kant_id end_kant_id X_start Y_start X_end Y_end route_id Population RegularMovers
KANT_start
Aakre Valga Aakre Valga 1 1 627213.008799 6.440209e+06 627213.008799 6.440209e+06 1697 470 96
Aardla Tartu Aardla Tartu 2 2 664206.080560 6.466176e+06 664206.080560 6.466176e+06 2545 1087 239
Aarna Põlva Aarna Põlva 3 3 674944.509786 6.441704e+06 674944.509786 6.441704e+06 3393 243 19
Aaspere Lääne-Viru Aaspere Lääne-Viru 4 4 620838.092143 6.592462e+06 620838.092143 6.592462e+06 4241 441 173
Abja Viljandi Abja-Paluoja linn 6 5 579178.093070 6.441166e+06 579742.050531 6.444023e+06 5090 564 154
... ... ... ... ... ... ... ... ... ... ...
Ääsmäe Harju Aruküla Harju 837 34 529187.870955 6.566963e+06 561079.126772 6.580363e+06 29635 1266 2
Õisu Viljandi Abja-Paluoja linn 832 5 588943.147572 6.452916e+06 579742.050531 6.444023e+06 5914 455 19
Õru Valga Aakre Valga 833 1 627647.829537 6.423045e+06 627213.008799 6.440209e+06 2527 354 1
Üdruma Lääne Haapsalu linn 838 61 503259.157896 6.517914e+06 473490.460954 6.532856e+06 52505 133 9
Ülenurme Tartu Aakre Valga 839 1 659202.938891 6.468463e+06 627213.008799 6.440209e+06 2533 4959 1

845 rows × 10 columns

In [11]:
start_locations  = list(map(transformer,start.X_start,start.Y_start))
In [12]:
start_locations_array = np.array(start_locations)
start_locations_array
lngs = start_locations_array[:,0]
lats = start_locations_array[:,1]
In [13]:
start_points = dict(zip(start.index.values,start_locations))
In [14]:
stdf = pd.DataFrame(zip(start.index.values,lngs,lats),columns=['name','lng','lat'])
stdf
Out[14]:
name lng lat
0 Aakre Valga 26.156788 58.084661
1 Aardla Tartu 26.801715 58.305377
2 Aarna Põlva 26.966251 58.081711
3 Aaspere Lääne-Viru 26.130749 59.452678
4 Abja Viljandi 25.342999 58.104500
... ... ... ...
840 Ääsmäe Harju 24.511392 59.240014
841 Õisu Viljandi 25.513075 58.208101
842 Õru Valga 26.154810 57.930509
843 Üdruma Lääne 24.056365 58.800669
844 Ülenurme Tartu 26.718025 58.327739

845 rows × 3 columns

In [ ]:
 

Let's focus to the capital city of Estonia, Tallinn and show all of the connections in that.¶

The coordinate should be in this box:
24.9118,59.4768;24.5989,59.3643

In [15]:
tallinn = stdf[(stdf.lng < 24.9118 ) & (stdf.lng > 24.5989 ) & (stdf.lat < 59.4768) & (stdf.lat > 59.3643)]
In [16]:
df
Out[16]:
KANT_start KANT_end start_kant_id end_kant_id X_start Y_start X_end Y_end route_id Population RegularMovers
0 Aakre Valga Aakre Valga 1 1 627213.008799 6.440209e+06 627213.008799 6.440209e+06 1697 470 96
1 Aakre Valga Annikoru Tartu 1 25 627213.008799 6.440209e+06 634298.193717 6.461856e+06 21178 470 1
2 Aakre Valga Elva linn 1 49 627213.008799 6.440209e+06 642136.014361 6.456563e+06 41506 470 12
3 Aakre Valga Elva ümbruse kant Tartu 1 50 627213.008799 6.440209e+06 639265.501560 6.453032e+06 42353 470 1
4 Aakre Valga Hellenurme Valga 1 81 627213.008799 6.440209e+06 640178.641901 6.447022e+06 68610 470 2
... ... ... ... ... ... ... ... ... ... ... ...
28504 Ülenurme Tartu Võnnu Tartu 839 810 659202.938891 6.468463e+06 681155.798401 6.464204e+06 686062 4959 6
28505 Ülenurme Tartu Võru linn 839 812 659202.938891 6.468463e+06 678417.528347 6.414736e+06 687756 4959 6
28506 Ülenurme Tartu Võõpste Tartu 839 816 659202.938891 6.468463e+06 683260.892473 6.473663e+06 691144 4959 1
28507 Ülenurme Tartu Välgi Tartu 839 823 659202.938891 6.468463e+06 669844.459691 6.497459e+06 697073 4959 1
28508 Ülenurme Tartu Ülenurme Tartu 839 839 659202.938891 6.468463e+06 659202.938891 6.468463e+06 710625 4959 2073

28509 rows × 11 columns

In [17]:
sample_od = df[(df.KANT_start.isin(tallinn.name)) & (df.KANT_end.isin(tallinn.name))]
sample_od
Out[17]:
KANT_start KANT_end start_kant_id end_kant_id X_start Y_start X_end Y_end route_id Population RegularMovers
1857 Haabersti linnaosa Haabersti linnaosa 840 840 535384.478081 6.587643e+06 535384.478081 6.587643e+06 711473 44344 23355
1880 Haabersti linnaosa Kesklinna linnaosa 840 845 535384.478081 6.587643e+06 543405.459220 6.588251e+06 715708 44344 6609
1896 Haabersti linnaosa Kristiine linnaosa 840 841 535384.478081 6.587643e+06 540384.878201 6.586570e+06 712320 44344 2841
1911 Haabersti linnaosa Lasnamäe linnaosa 840 847 535384.478081 6.587643e+06 548341.135208 6.588755e+06 717402 44344 1931
1924 Haabersti linnaosa Mustamäe linnaosa 840 842 535384.478081 6.587643e+06 538422.923591 6.585222e+06 713167 44344 3304
... ... ... ... ... ... ... ... ... ... ... ...
17477 Põhja-Tallinna linnaosa Mustamäe linnaosa 844 842 539835.733135 6.590848e+06 538422.923591 6.585222e+06 713171 58057 2180
17485 Põhja-Tallinna linnaosa Nõmme linnaosa 844 843 539835.733135 6.590848e+06 538867.807140 6.582028e+06 714018 58057 768
17497 Põhja-Tallinna linnaosa Peetri Harju 844 480 539835.733135 6.590848e+06 547720.494583 6.583147e+06 406557 58057 445
17500 Põhja-Tallinna linnaosa Pirita linnaosa 844 846 539835.733135 6.590848e+06 548907.034349 6.593027e+06 716559 58057 343
17504 Põhja-Tallinna linnaosa Põhja-Tallinna linnaosa 844 844 539835.733135 6.590848e+06 539835.733135 6.590848e+06 714865 58057 33294

81 rows × 11 columns

In [ ]:
 
In [ ]:
 
In [18]:
def substiute(st,nd):
    st_lng = start_points[st][0]
    st_lat = start_points[st][1]
    nd_lng = start_points[nd][0]
    nd_lat = start_points[nd][1]

    return st_lng, nd_lng, st_lat, nd_lat
In [19]:
sample_od.KANT_end
Out[19]:
1857          Haabersti linnaosa
1880          Kesklinna linnaosa
1896          Kristiine linnaosa
1911           Lasnamäe linnaosa
1924           Mustamäe linnaosa
                  ...           
17477          Mustamäe linnaosa
17485             Nõmme linnaosa
17497               Peetri Harju
17500            Pirita linnaosa
17504    Põhja-Tallinna linnaosa
Name: KANT_end, Length: 81, dtype: object
In [20]:
ndf = pd.DataFrame(map(substiute,sample_od.KANT_start,sample_od.KANT_end),columns=['start_lon','end_lon','start_lat','end_lat'])
ndf['start'] = sample_od.KANT_start.values
ndf['end'] = sample_od.KANT_end.values
ndf
Out[20]:
start_lon end_lon start_lat end_lat start end
0 24.623326 24.623326 59.425178 59.425178 Haabersti linnaosa Haabersti linnaosa
1 24.623326 24.764738 59.425178 59.429887 Haabersti linnaosa Kesklinna linnaosa
2 24.623326 24.711209 59.425178 59.415103 Haabersti linnaosa Kristiine linnaosa
3 24.623326 24.851803 59.425178 59.433880 Haabersti linnaosa Lasnamäe linnaosa
4 24.623326 24.676419 59.425178 59.403183 Haabersti linnaosa Mustamäe linnaosa
... ... ... ... ... ... ...
76 24.702327 24.676419 59.453550 59.403183 Põhja-Tallinna linnaosa Mustamäe linnaosa
77 24.702327 24.683678 59.453550 59.374473 Põhja-Tallinna linnaosa Nõmme linnaosa
78 24.702327 24.839631 59.453550 59.383614 Põhja-Tallinna linnaosa Peetri Harju
79 24.702327 24.862742 59.453550 59.472156 Põhja-Tallinna linnaosa Pirita linnaosa
80 24.702327 24.702327 59.453550 59.453550 Põhja-Tallinna linnaosa Põhja-Tallinna linnaosa

81 rows × 6 columns

In [21]:
alng = np.array(ndf.iloc[:,:2])
alat = np.array(ndf.iloc[:,2:4])
In [22]:
rlng = alng.reshape(-1)
rlat = alat.reshape(-1)
In [23]:
names = []
start_counts = ndf.groupby('start')['end'].count()
for c, name in zip(start_counts,start_counts.index):
    names.append([name] * 2*c)
In [24]:
from itertools import chain
names = list(chain(*names))
In [25]:
od_series = pd.DataFrame(zip(rlng,rlat,names),columns=['lon','lat','locations'])
In [26]:
fig = px.line_mapbox(od_series, lat="lat", lon="lon", color="locations", zoom=3, height=300)

fig.update_layout(mapbox_style="stamen-terrain", mapbox_zoom=10, mapbox_center_lat = 59.43550,
    margin={"r":0,"t":20,"l":0,"b":0},height=600, width=900,)

fig.show()
In [27]:
od_series.to_csv('Tallinn_od_serie.csv',index=False)
In [ ]: